us <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6373_Time_Series/master/TermProject/Data/USdaily.7.26.20.csv", header = T, strip.white = T)
us <- transform(us, date = as.Date(as.character(date), "%Y%m%d"))
us <- subset(us, select = -c(states, dateChecked, hospitalized, lastModified, total, posNeg, totalTestResultsIncrease, hash))
us[is.na(us)] <- 0
#Selecting only those dates with reported current hospitilizations
us <- dplyr::slice(us,1:132)
us = us[order(as.Date(us$date, format = "%Y%m%d")),]
head(us)
## date positive negative pending hospitalizedCurrently
## 132 2020-03-17 11928 63104 1687 325
## 131 2020-03-18 15099 84997 2526 416
## 130 2020-03-19 19770 108407 3016 617
## 129 2020-03-20 26025 138814 3330 1042
## 128 2020-03-21 32910 177262 3468 1436
## 127 2020-03-22 42169 213476 2842 2155
## hospitalizedCumulative inIcuCurrently inIcuCumulative
## 132 55 0 0
## 131 67 0 0
## 130 85 0 0
## 129 108 0 0
## 128 2020 0 0
## 127 3023 0 0
## onVentilatorCurrently onVentilatorCumulative recovered death
## 132 0 0 0 122
## 131 0 0 0 153
## 130 0 0 0 199
## 129 0 0 0 267
## 128 0 0 0 328
## 127 0 0 0 471
## totalTestResults deathIncrease hospitalizedIncrease negativeIncrease
## 132 75032 22 13 13707
## 131 100096 31 12 21893
## 130 128177 46 18 23410
## 129 164839 68 23 30407
## 128 210172 61 1912 38448
## 127 255645 143 1003 36214
## positiveIncrease
## 132 3613
## 131 3171
## 130 4671
## 129 6255
## 128 6885
## 127 9259
It is difficult to assume stationarity for this data due to multiple factors. We are working under the assumption that COVID is a novel virus and cases as well as hospitalizations will eventually return to zero. This being said our current modeling techniques do things such as return to the median or mimic the previously seen trends. Also, we see a heavy wandering trend in both new cases and hospitalization would be dependent on this as well as time. We will review the data and see what, if any, non-stationary components reveal themselves and model the data accordingly.
Traits:
ggplot(data = us, aes(x=date, y=hospitalizedCurrently))+
geom_line(color="orange")+
labs(title = "Current COVID Hospitalized Cases US", y = "Thousands", x = "") +
theme_fivethirtyeight()
Realization:
ACF:
Spectral Density:
plotts.sample.wge(us$hospitalizedCurrently, lag.max = 100)
## $autplt
## [1] 1.000000000 0.967247046 0.928472445 0.883791745 0.834039153
## [6] 0.779901780 0.722099406 0.660670233 0.595507232 0.527652159
## [11] 0.459798098 0.392707355 0.325070497 0.257394633 0.190076427
## [16] 0.123107742 0.058045025 -0.005592108 -0.062510518 -0.114255855
## [21] -0.163281121 -0.206979530 -0.242176807 -0.275097030 -0.300626756
## [26] -0.323018458 -0.340862401 -0.357234304 -0.371125261 -0.380223777
## [31] -0.387733922 -0.393904174 -0.399188337 -0.403702529 -0.407535556
## [36] -0.409058093 -0.406032310 -0.402291057 -0.397307333 -0.392192952
## [41] -0.385158345 -0.377444480 -0.367999137 -0.357234818 -0.345390268
## [46] -0.333657101 -0.320405088 -0.307093151 -0.293895002 -0.279566463
## [51] -0.263105425 -0.246363949 -0.229539058 -0.213137515 -0.196728805
## [56] -0.180700291 -0.164151991 -0.145439160 -0.126569221 -0.107984741
## [61] -0.090254314 -0.072242265 -0.054130291 -0.034756123 -0.014095709
## [66] 0.007187366 0.028500485 0.048915792 0.068729196 0.088785284
## [71] 0.109636722 0.130889993 0.152506791 0.173725394 0.193391213
## [76] 0.211419137 0.228441065 0.245143327 0.260841689 0.274513731
## [81] 0.286671443 0.296525403 0.304596844 0.311148685 0.317183678
## [86] 0.321808447 0.324325398 0.323899788 0.321013182 0.315477248
## [91] 0.307881329 0.298203689 0.286925511 0.272658784 0.256479904
## [96] 0.236600281 0.213678525 0.188829126 0.163809020 0.138368627
## [101] 0.111541632
##
## $freq
## [1] 0.007575758 0.015151515 0.022727273 0.030303030 0.037878788
## [6] 0.045454545 0.053030303 0.060606061 0.068181818 0.075757576
## [11] 0.083333333 0.090909091 0.098484848 0.106060606 0.113636364
## [16] 0.121212121 0.128787879 0.136363636 0.143939394 0.151515152
## [21] 0.159090909 0.166666667 0.174242424 0.181818182 0.189393939
## [26] 0.196969697 0.204545455 0.212121212 0.219696970 0.227272727
## [31] 0.234848485 0.242424242 0.250000000 0.257575758 0.265151515
## [36] 0.272727273 0.280303030 0.287878788 0.295454545 0.303030303
## [41] 0.310606061 0.318181818 0.325757576 0.333333333 0.340909091
## [46] 0.348484848 0.356060606 0.363636364 0.371212121 0.378787879
## [51] 0.386363636 0.393939394 0.401515152 0.409090909 0.416666667
## [56] 0.424242424 0.431818182 0.439393939 0.446969697 0.454545455
## [61] 0.462121212 0.469696970 0.477272727 0.484848485 0.492424242
## [66] 0.500000000
##
## $db
## [1] 8.4297671 13.7645189 12.5249640 8.3502117 4.2830812
## [6] -0.7743908 -1.6306179 -1.6820655 -1.1397181 -1.9660002
## [11] -3.8470478 -4.5601791 -5.7979845 -6.6448260 -8.1613031
## [16] -7.7172776 -7.0289454 -6.7231288 -11.3437841 -7.3292891
## [21] -8.5570448 -9.8123688 -12.3352898 -12.0470153 -10.4188757
## [26] -11.1989248 -11.5484834 -11.3001319 -11.8583444 -13.4758586
## [31] -13.5752424 -13.1601144 -13.4822098 -11.8751077 -12.0098408
## [36] -11.6652933 -14.1857608 -18.7098443 -15.1452524 -14.2793937
## [41] -13.6312562 -13.4537203 -13.8449147 -14.8421169 -15.8144787
## [46] -16.3497508 -16.1392884 -14.5229548 -13.9096257 -14.2979673
## [51] -15.1762427 -16.9533791 -16.6240685 -15.5004175 -14.8930293
## [56] -15.3404690 -17.3904939 -15.6412620 -14.5937114 -14.5227461
## [61] -16.6299676 -17.9885318 -16.8369446 -17.2106857 -15.1218462
## [66] -13.7373278
##
## $dbz
## [1] 10.8000075 10.4225032 9.7877656 8.8879309 7.7133502
## [6] 6.2551734 4.5113555 2.5000875 0.2870472 -1.9727273
## [11] -4.0185421 -5.5981361 -6.6776527 -7.4337186 -8.0352652
## [16] -8.5435576 -8.9672493 -9.3368790 -9.7184801 -10.1759984
## [21] -10.7327082 -11.3585971 -11.9855900 -12.5448786 -13.0053273
## [26] -13.3805586 -13.7009145 -13.9838700 -14.2295004 -14.4361103
## [31] -14.6143036 -14.7849900 -14.9660787 -15.1624073 -15.3669935
## [36] -15.5703444 -15.7685492 -15.9634820 -16.1567949 -16.3451059
## [41] -16.5214784 -16.6812308 -16.8255589 -16.9589464 -17.0832210
## [46] -17.1948352 -17.2887237 -17.3653174 -17.4335644 -17.5062347
## [51] -17.5910578 -17.6848979 -17.7756255 -17.8505813 -17.9054045
## [56] -17.9463455 -17.9845791 -18.0276392 -18.0745244 -18.1172420
## [61] -18.1468027 -18.1590805 -18.1567596 -18.1470189 -18.1375821
## [66] -18.1338177
Since we are seeing heavy wandering behavior we will use overfit tables to see if we can surface any (1-B) factors that have roots very near the unit circle.
est.ar.wge(us$hospitalizedCurrently,p=6,type='burg')
##
## Coefficients of Original polynomial:
## 1.2113 0.0324 -0.0917 -0.0697 0.0013 -0.1010
##
## Factor Roots Abs Recip System Freq
## 1-1.9283B+0.9354B^2 1.0308+-0.0812i 0.9672 0.0125
## 1+0.9678B+0.3408B^2 -1.4200+-0.9583i 0.5838 0.4055
## 1-0.2507B+0.3168B^2 0.3957+-1.7322i 0.5628 0.2143
##
##
## $phi
## [1] 1.211269271 0.032440618 -0.091744816 -0.069675720 0.001316006
## [6] -0.100966743
##
## $res
## [1] -292.92920 42.23585 -92.70196 -102.02123 -334.70493
## [6] -145.22402 -403.96302 34.46467 -83.49135 1318.62578
## [11] 1377.16436 -880.96039 -636.85824 -375.11269 230.53004
## [16] 589.34824 -162.87147 523.10092 2268.40156 -856.96328
## [21] 1307.41009 4905.29484 -2079.33995 2125.99214 -1561.57046
## [26] -286.55877 -2910.62575 -721.73533 2310.48679 -741.05699
## [31] -1458.33820 -885.25596 -1020.67917 -806.18397 1240.82698
## [36] 3855.28027 -594.27955 -332.98691 -1561.70454 599.16915
## [41] -668.18241 910.28844 838.47931 588.30675 -699.57648
## [46] 648.28101 -290.27821 -792.40531 664.36263 1721.18828
## [51] -247.47143 -655.56939 -1027.72246 -316.65232 -555.31836
## [56] 670.18033 2208.79087 -46.31492 -741.64210 -711.16638
## [61] -345.85017 -802.33939 1055.50075 1019.45748 423.37585
## [66] -263.76408 -870.20100 -934.79262 -213.25332 762.11152
## [71] 755.17733 958.19565 -257.19616 -1107.16022 -1097.48478
## [76] -392.31146 25.12350 266.29333 -150.37710 11.46933
## [81] -24.36326 -230.98962 -270.72490 81.75412 189.17778
## [86] -227.80796 -1100.72037 -289.22665 -380.30212 -216.89601
## [91] 321.96954 641.36609 239.90315 -394.37919 -45.80287
## [96] -932.43620 101.31456 444.74934 1155.39447 225.84784
## [101] -36.98998 -963.47841 90.12262 -630.67985 649.45247
## [106] 1114.69662 348.88102 78.19229 -682.63199 -511.33045
## [111] -33.55819 480.50704 1404.41245 468.10554 -172.16756
## [116] 6696.26244 -2026.25073 -1457.43811 -294.49345 416.89154
## [121] -780.81322 714.03055 -299.92450 -595.44881 59.38061
## [126] 536.60736 999.48047 240.91788 133.31315 -233.45378
## [131] -301.48621 -282.07861
##
## $avar
## [1] 1358148
##
## $aic
## [1] 14.22769
##
## $aicc
## [1] 15.25171
##
## $bic
## [1] 14.38057
Once the data has been differenced we see something that looks much closer to a stationary data set. However, we have also surfaced what appears to be a small seasonality component. We see the ACF have higher spikes surface at 7 and 14, which would lead us to believe there is a 7 day seasonal component.
us.diff = artrans.wge(us$hospitalizedCurrently, phi.tr = 1, lag.max = 100)
acf(us.diff)
Above we have surfaced what appears to be a 7 day seasonality trend. We will now transform the data for the s=7.
us.diff.seas = artrans.wge(us.diff,phi.tr = c(0,0,0,0,0,0,1), lag.max = 100)
When we diagnose the the best models to use for our stationary data set we see the R AIC5 function selects an AIC ARMA(5,1) model while the BIC selects a AR(2). The AR(2) model is consistent with our pseudo-cyclic data as well as the dampening cyclical sample autocorrelations that are produced by the transformed data. The ARMA(5,1) could also produce these same traits. We will move forward and compare these two models.
aic5.wge(us.diff.seas)
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of aic
## p q aic
## 17 5 1 14.57449
## 10 3 0 14.60386
## 13 4 0 14.61477
## 6 1 2 14.61527
## 8 2 1 14.61581
aic5.wge(us.diff.seas,type = "bic")
## ---------WORKING... PLEASE WAIT...
##
##
## Five Smallest Values of bic
## p q bic
## 7 2 0 14.68775
## 10 3 0 14.69484
## 6 1 2 14.70625
## 8 2 1 14.70679
## 3 0 2 14.72173
Both of the Junge Box test show us that we reject the H null with pvalues that are < 0.05 alpha significance level.
ljung.wge(us.diff.seas)$pval
## Obs 0.2522573 0.4096169 0.2803795 0.1452644 0.1021298 0.1354335 -0.2827722 0.07264095 -0.09804601 -0.01062144 0.004067316 -0.04130786 -0.03854601 -0.02866098 -0.09193388 -0.05167819 -0.1056372 -0.1193326 -0.08827532 -0.1061183 -0.1054081 -0.036613 -0.0654078 -0.03489691
## [1] 1.859792e-06
ljung.wge(us.diff.seas, K=48)$pval
## Obs 0.2522573 0.4096169 0.2803795 0.1452644 0.1021298 0.1354335 -0.2827722 0.07264095 -0.09804601 -0.01062144 0.004067316 -0.04130786 -0.03854601 -0.02866098 -0.09193388 -0.05167819 -0.1056372 -0.1193326 -0.08827532 -0.1061183 -0.1054081 -0.036613 -0.0654078 -0.03489691 0.02370281 -0.02743256 -0.02153882 -0.03584166 -0.1039903 -0.02877203 -0.03843455 -0.07298731 -0.03237146 -0.02495354 0.008256594 0.02396111 -0.06576515 -0.04980251 -0.04736059 -0.04415211 -0.03591561 -0.04413551 -0.03016308 0.03982533 0.008384104 0.02503231 0.03614677 0.01834679
## [1] 0.003990771
AIC Phi and Theta Estimates
est.us.diff.seasAIC = est.arma.wge(us.diff.seas, p = 5, q=1)
##
## Coefficients of Original polynomial:
## -0.6097 0.4850 0.5047 0.0643 -0.2269
##
## Factor Roots Abs Recip System Freq
## 1+0.9305B -1.0747 0.9305 0.5000
## 1+0.9405B+0.5775B^2 -0.8143+-1.0337i 0.7599 0.3562
## 1-1.2613B+0.4223B^2 1.4934+-0.3713i 0.6498 0.0388
##
##
mean(us$hospitalizedCurrently)
## [1] 39625.17
BIC Phi Estiamtes
est.us.diff.seasBIC = est.arma.wge(us.diff.seas, p = 2)
##
## Coefficients of Original polynomial:
## 0.1594 0.3740
##
## Factor Roots Abs Recip System Freq
## 1-0.6964B 1.4359 0.6964 0.0000
## 1+0.5370B -1.8622 0.5370 0.5000
##
##
mean(us$hospitalizedCurrently)
## [1] 39625.17
shortARMA <- fore.aruma.wge(us$hospitalizedCurrently, phi = est.us.diff.seasAIC$phi, theta = est.us.diff.seasAIC$theta, d= 1, s = 7, n.ahead = 7, lastn = F, limits = T)
est.us.diff.seasAIC$aic
## [1] 14.57449
phis = est.us.diff.seasAIC$phi
thetas = est.us.diff.seasAIC$theta
trainingSize = 24
horizon = 7
ASEHolder = numeric()
for( i in 1:(124-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = 7, d = 1, n.ahead = horizon)
ASE = mean((us$hospitalizedCurrently[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - shortARMA$f)^2)
ASEHolder[i] = ASE
}
invisible(ASEHolder)
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1272430 29588508 333408849 374722941 673776716 942860862
WindowedASE
## [1] 374722941
fs = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize+horizon)-1)],phi = phis, theta = thetas, s = 7, d = 1,n.ahead = 7, lastn = TRUE)
ASE = mean((us$hospitalizedCurrently[(i+trainingSize):(i+(trainingSize+horizon)-1)] - fs$f )^2)
ASE
## [1] 73605156
longARMA <- fore.aruma.wge(us$hospitalizedCurrently, phi = est.us.diff.seasAIC$phi, theta = est.us.diff.seasAIC$theta, d= 1, s = 7, n.ahead = 90, lastn = F, limits = F)
phis = est.us.diff.seasAIC$phi
thetas = est.us.diff.seasAIC$theta
trainingSize = 24
horizon = 90
ASEHolder = numeric()
for( i in 1:(124-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = 7, d = 1,n.ahead = horizon)
ASE = mean((us$hospitalizedCurrently[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - longARMA$f)^2)
ASEHolder[i] = ASE
}
invisible(ASEHolder)
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 243601762 248462559 254633109 254917886 261033217 267272736
WindowedASE
## [1] 254917886
fs = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize+horizon)-1)],phi = phis, theta = thetas, s = 7, d = 1, n.ahead = 90, lastn = T)
ASE = mean((us$hospitalizedCurrently[(i+trainingSize):(i+(trainingSize+horizon)-1)] - fs$f )^2)
ASE
## [1] 108278159
shortAR <- fore.aruma.wge(us$hospitalizedCurrently, phi = est.us.diff.seasBIC$phi, d=1, s=7, n.ahead = 7, lastn = FALSE, limits = FALSE)
est.us.diff.seasBIC$aic
## [1] 14.61952
phis = est.us.diff.seasBIC$phi
thetas = est.us.diff.seasBIC$theta
trainingSize = 24
horizon = 7
ASEHolder = numeric()
for( i in 1:(124-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = 7, d = 1, n.ahead = horizon)
ASE = mean((us$hospitalizedCurrently[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - shortAR$f)^2)
ASEHolder[i] = ASE
}
invisible(ASEHolder)
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1637445 31817714 341701004 382068707 685537393 956760251
WindowedASE
## [1] 382068707
fs = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize+horizon)-1)],phi = phis, theta = thetas, s = 7, d = 1,n.ahead = 7, lastn = TRUE)
ASE = mean((us$hospitalizedCurrently[(i+trainingSize):(i+(trainingSize+horizon)-1)] - fs$f )^2)
ASE
## [1] 60809514
longAR <- fore.aruma.wge(us$hospitalizedCurrently, phi = est.us.diff.seasBIC$phi, s= 7, d = 1, n.ahead = 90, lastn = FALSE, limits = FALSE)
ASElongAR = mean((us$hospitalizedCurrently[(124-90+1):124]-longAR$f)^2)
ASElongAR
## [1] 293286598
phis = est.us.diff.seasBIC$phi
thetas = est.us.diff.seasBIC$theta
trainingSize = 24
horizon = 90
ASEHolder = numeric()
for( i in 1:(124-(trainingSize + horizon) + 1))
{
forecasts = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize-1))],phi = phis, theta = thetas, s = 7, d = 1,n.ahead = horizon)
ASE = mean((us$hospitalizedCurrently[(trainingSize+i):(trainingSize+ i + (horizon) - 1)] - longAR$f)^2)
ASEHolder[i] = ASE
}
ASEHolder
## [1] 273011817 275072396 276591563 278271604 280408457 282811057 285096031
## [8] 287205763 289262672 291232189 293286598
hist(ASEHolder)
WindowedASE = mean(ASEHolder)
summary(ASEHolder)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 273011817 277431584 282811057 282931831 288234217 293286598
WindowedASE
## [1] 282931831
fs = fore.aruma.wge(us$hospitalizedCurrently[i:(i+(trainingSize+horizon)-1)],phi = phis, theta = thetas, s = 7, d = 1,n.ahead = 90, lastn = TRUE)
ASE = mean((us$hospitalizedCurrently[(i+trainingSize):(i+(trainingSize+horizon)-1)] - fs$f )^2)
ASE
## [1] 185865366
library(nnfor)
## Loading required package: forecast
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
head(us)
## date positive negative pending hospitalizedCurrently
## 132 2020-03-17 11928 63104 1687 325
## 131 2020-03-18 15099 84997 2526 416
## 130 2020-03-19 19770 108407 3016 617
## 129 2020-03-20 26025 138814 3330 1042
## 128 2020-03-21 32910 177262 3468 1436
## 127 2020-03-22 42169 213476 2842 2155
## hospitalizedCumulative inIcuCurrently inIcuCumulative
## 132 55 0 0
## 131 67 0 0
## 130 85 0 0
## 129 108 0 0
## 128 2020 0 0
## 127 3023 0 0
## onVentilatorCurrently onVentilatorCumulative recovered death
## 132 0 0 0 122
## 131 0 0 0 153
## 130 0 0 0 199
## 129 0 0 0 267
## 128 0 0 0 328
## 127 0 0 0 471
## totalTestResults deathIncrease hospitalizedIncrease negativeIncrease
## 132 75032 22 13 13707
## 131 100096 31 12 21893
## 130 128177 46 18 23410
## 129 164839 68 23 30407
## 128 210172 61 1912 38448
## 127 255645 143 1003 36214
## positiveIncrease
## 132 3613
## 131 3171
## 130 4671
## 129 6255
## 128 6885
## 127 9259
usTrain.nn = ts(us$hospitalizedCurrently[1:122])
us.nn.fit = mlp(usTrain.nn, outplot = T, comb = "mean", m=7, reps = 50)
plot(us.nn.fit)
us.nn.fit.fore = forecast(us.nn.fit, h=10)
plot(us.nn.fit.fore)
plot(us$hospitalizedCurrently[123:132], type = "l", ylim = c(55000, 80000))
lines(seq(1:10), us.nn.fit.fore$mean, col = "blue")
ASEus.nn.fit.fore = mean((us$hospitalizedCurrently[123:132]-us.nn.fit.fore$mean)^2)
ASEus.nn.fit.fore
## [1] 111935470